import pandas as pd
from tqdm import tqdm
from gensim.models.fasttext import load_facebook_vectors
from pathlib import Path

tqdm.pandas()

script_dir = Path(__file__).resolve().parent


def load_sentences():
    return pd.read_feather(script_dir / "../data/source/manifesto_corpus.feather")


def load_model(target_language: str):
    return load_facebook_vectors(
        script_dir / f"../data/ddr/embeddings/fasttext_model_{target_language}wiki.bin"
    )


def get_seed_words(target_language):
    ddr_seed_words = pd.read_excel(script_dir / "../data/ddr/ddr_seed_words.xlsx")
    list_of_ddr_vectors = []
    ddr_words = ddr_seed_words[ddr_seed_words["LANGUAGE"] == target_language].copy()

    for moral_direction in ddr_words["CATEGORY"]:
        list_of_ddr_vectors.append(
            {
                "name": moral_direction,
                "words": list(
                    ddr_words[ddr_words["CATEGORY"] == moral_direction].iloc[0][2:6]
                ),
            }
        )

    return list_of_ddr_vectors


def apply_ddr(input, model, seed_words, strategy):
    returned_dict = {}
    if strategy == "all_en":
        text_to_score = input["text_en"]
    else:
        text_to_score = input["text"]
    for moral_foundation in seed_words:
        returned_dict[f"ddr_{strategy}_{moral_foundation['name']}"] = (
            model.n_similarity(
                [text_to_score],
                moral_foundation["words"],
            )
        )
    return returned_dict


def score_all_en():
    print("Scoring all English translations")
    sentences = load_sentences()
    model = load_model(target_language="en")
    ddr_seed_words = get_seed_words(target_language="en")

    df_all_en = sentences.progress_apply(
        apply_ddr,
        model=model,
        seed_words=ddr_seed_words,
        strategy="all_en",
        axis="columns",
        result_type="expand",
    )

    df_all_en_export = pd.concat(
        [df_all_en, sentences["id_for_project"]], axis="columns"
    )
    Path(script_dir / "../data/ddr/").mkdir(parents=True, exist_ok=True)

    df_all_en_export.reset_index().to_feather(script_dir / "../data/ddr/all_en.feather")


def score_a_language(language: str):
    print(f"Scoring all sentences in {language}.")
    sentences = load_sentences()
    sentences = sentences[sentences["language_iso"] == language]
    model = load_model(target_language=language)
    ddr_seed_words = get_seed_words(target_language=language)

    df_applied = sentences.progress_apply(
        apply_ddr,
        model=model,
        seed_words=ddr_seed_words,
        strategy="original_language",
        axis="columns",
        result_type="expand",
    )

    df_applied = pd.concat([df_applied, sentences["id_for_project"]], axis="columns")
    Path(script_dir / "../data/ddr/").mkdir(parents=True, exist_ok=True)

    df_applied.reset_index().to_feather(
        script_dir / f"../data/ddr/sentences_{language}.feather"
    )


if __name__ == "__main__":
    score_all_en()
    languages = ["en", "de", "nl", "es"]
    for language in languages:
        score_a_language(language=language)
